Introduction

This document provides all the code necessary to reproduce the analysis, simulations and figures in Harrison (2020) A Brief Introduction to the Analysis of Time Series Data from Biologger Studies.

Setup

#Libraries
    library(stats)
    library(ggplot2)
    library(cowplot)
## 
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
##   default ggplot2 theme anymore. To recover the previous
##   behavior, execute:
##   theme_set(theme_cowplot())
## ********************************************************
    library(simts)
    library(rphylopic)
    library(tidybayes)
    library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
    library(datasets)
    library(parallel)
    library(tidyr)
    library(nlme)
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:forecast':
## 
##     getResponse
    library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:nlme':
## 
##     collapse
## The following object is masked from 'package:simts':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#Global Plot Options
  plotopts<-theme(axis.text=element_text(size=20),axis.title = element_text(size=18),strip.text = element_text(size=20))

#Multi-Core
    options(mc.cores=detectCores())

Section 1: Using the Wrong Tests

1.1 Setup

    set.seed(123)
    
  #Define Values  
    n = 100                               # process length
    phi=0.5
    sigma2=0.5
    
    ts_t2<-numeric(10000)
    glm_p<-matrix(NA,ncol=2,nrow=10000)
    gls_p<-numeric(10000)

1.2 Simulations

    for(k in 1:10000){
  #Generate Time Series  
    timedata=data.frame(yval=c(gen_gts(n, AR1(sigma2 = sigma2,phi=phi)),gen_gts(n, AR1(sigma2 = sigma2,phi=phi))),id=rep(c("id1","id2"),each=n),time=rep(seq(n),2))
  #T Test For Difference  
    ts_t2[k]<-with(timedata,t.test(yval~id))$p.value
  #Ordinary GLM  
   glm_p[k,1]<-with(timedata,anova(glm(yval~id+time),glm(yval~time),test="F"))["Pr(>F)"][1][2,1]
   glm_p[k,2]<-with(timedata,anova(glm(yval~id+time),glm(yval~id),test="F"))["Pr(>F)"][1][2,1]
  #GLS
   t1 <- gls(yval ~ time,
             na.action = na.omit, data = timedata,
             correlation = corAR1(form = ~ time|id),method="ML")
   tnull<-gls(yval ~ 1,
              na.action = na.omit, data = timedata,
              correlation = corAR1(form = ~ time|id),method="ML")
   gls_p[k]<-anova(t1,tnull)[[9]][2]
    }

1.3 False Positive Rates

## T Test 
    mean(ts_t2<=0.05)
## [1] 0.2558
# GLM Effect of ID
    mean(glm_p[,1]<=0.05)
## [1] 0.258
# GLM Effect of Time    
    mean(glm_p[,2]<=0.05)
## [1] 0.2469
#GLS Effect of Time     
    mean(gls_p<=0.05)
## [1] 0.0539

1.4 Plotting

  ## Example Time Series Data 
      time_eg1<-data.frame(tseries=c(gen_gts(n, AR1(sigma2 = sigma2,phi=phi)),gen_gts(n, AR1(sigma2 = sigma2,phi=phi))),time=rep(seq(n),2),seriesval=rep(c("Individual  1","Individual 2"),each=n))
  ## Lizard Picture 
      lizard <- name_search(text = "Velociraptor", options = "namebankID")[[1]]
      lizard_img<-image_data(name_images(uuid = lizard$uid[1])$same[[4]]$uid,size=1024)[[1]]
      
  ##  Example Time Series Plot 
      tplot1<-ggplot(time_eg1,aes(x=time,y=tseries)) + geom_line(color="lightblue",cex=1.5) + facet_wrap(.~seriesval,ncol=1)
      tplot2<-tplot1+ theme_bw() + plotopts + labs(y="Value",x="Time") + add_phylopic(lizard_img,x=12,y=1.6,ysize = 20,alpha=1,color="black")
      tplot2

  ## P Value Plot 
      t_data<-data.frame(vals=c(ts_t2,glm_p[,1],glm_p[,2],gls_p),test=rep(c("t test ID","glm ID", "glm Time", "gls Time"),each=length(ts_t2)))
      
      tdataplot1<-ggplot(t_data) + geom_vline(xintercept=0.05,lty="dashed") + geom_density(aes(x=vals,fill=test),alpha=0.2) + theme_bw() + facet_wrap(.~test) + labs(x="Proportion",y="Density") + plotopts
      tdataplot2<- tdataplot1+ scale_fill_brewer(palette="Set2") + guides(fill=F)  
      
      
      
  ## Combine 
      tplot_combined<-plot_grid(tplot2,tdataplot2,labels=c("A","B"),ncol=2,label_size = 20,rel_widths = c(1,2))
      tplot_combined

        #ggsave2('Wrong Test Plot.pdf',tplot_combined,width=18,height=8)
        #ggsave2('Wrong Test Plot.tiff',tplot_combined,width=18,height=8)

Section 2: Using the Wrong Time Series Model

2.2 Setup

      data(beavers)
      head(beaver1)
##   day time  temp activ
## 1 346  840 36.33     0
## 2 346  850 36.34     0
## 3 346  900 36.35     0
## 4 346  910 36.42     0
## 5 346  920 36.55     0
## 6 346  930 36.69     0
      set.seed(123)
      
  ## New Day/Time Object + Real Time Labels
      beaver1$time2<-beaver1$time
      beaver1$time2[which(beaver1$day==347)]<-beaver1$time2[which(beaver1$day==347)]+2360
      beaver1$time[1:9]<-paste0("0",beaver1$time[1:9])
      
      beaver1$time<-c(substr(beaver1$time,1,2))

2.3 Plot Beaver Body Temp Data

  ## Get Phylopic Image of Beaver 
      beav <- name_search(text = "Castor canadensis", options = "namebankID")[[1]]
      beav_img<-image_data(name_images(uuid = beav$uid[1])$same[[1]]$uid,size=1024)[[1]]
    
  ### Plot 
      ylab <- expression('Body Temperature ('*~degree*C*')')
      beav1<-ggplot(beaver1,aes(x=time2,y=temp))  + geom_line(col="blue",cex=2) + theme_bw() + plotopts + labs(y=ylab,x="Time")
      beav2<-beav1 + scale_x_continuous(breaks=c(800,1200,1600,2000,2400),labels=c("8:00","12:00","16:00","20:00","00:00"))+ add_phylopic(beav_img,x=1200,y=37.4,alpha=1,color="black",ysize=400)

2.4 Models - AR(1)

######### MODELS 
      library(nlme)
      
  ###  No Explicit Structure
      b2 <- gls(temp ~ time2,
                na.action = na.omit, data = beaver1,
                correlation = corAR1(form = ~ time2))
      summary(b2)
## Generalized least squares fit by REML
##   Model: temp ~ time2 
##   Data: beaver1 
##         AIC       BIC   logLik
##   -41.75318 -30.87919 24.87659
## 
## Correlation Structure: ARMA(1,0)
##  Formula: ~time2 
##  Parameter estimate(s):
## Phi1 
##    0 
## 
## Coefficients:
##                Value  Std.Error  t-value p-value
## (Intercept) 36.59101 0.05656552 646.8783       0
## time2        0.00015 0.00003027   5.0107       0
## 
##  Correlation: 
##       (Intr)
## time2 -0.957
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.21188190 -0.75704183 -0.03216342  0.74366254  3.49057262 
## 
## Residual standard error: 0.1755961 
## Degrees of freedom: 114 total; 112 residual
    #Residual Plot 
      E <- residuals(b2, type = "normalized")
      beaver_acf<-autoplot(acf(E),main="") + theme_bw() + plotopts
## Warning: Ignoring unknown parameters: main

2.5 Models - ARMA(1,1)

    ### Correct Structure 
      
      cs2 <- corARMA(c(0.3, -0.3), p =1, q = 1)   #arbitrary starting values 
      b3 <- gls(temp ~ time2,
                na.action = na.omit, data = beaver1,
                correlation = cs2)
      summary(b3)
## Generalized least squares fit by REML
##   Model: temp ~ time2 
##   Data: beaver1 
##         AIC       BIC   logLik
##   -180.8976 -167.3051 95.44882
## 
## Correlation Structure: ARMA(1,1)
##  Formula: ~1 
##  Parameter estimate(s):
##        Phi1      Theta1 
##  0.90038130 -0.04524174 
## 
## Coefficients:
##                Value  Std.Error   t-value p-value
## (Intercept) 36.43982 0.23862220 152.70925   0.000
## time2        0.00023 0.00012581   1.80362   0.074
## 
##  Correlation: 
##       (Intr)
## time2 -0.941
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.4338514 -0.6648389  0.2060595  0.7578094  2.8331080 
## 
## Residual standard error: 0.2125989 
## Degrees of freedom: 114 total; 112 residual
      beaver_correct_acf<-autoplot(acf( residuals(b3, type = "normalized")),main="") + theme_bw() + plotopts
## Warning: Ignoring unknown parameters: main

2.6 Combined Plot

  ## Combined Plot 
      beaver_combined<-plot_grid(beav2,beaver_acf,beaver_correct_acf,labels=c("A","B - AR(1)","C - ARMA(1,1)"),ncol=3,label_size = 20)
      beaver_combined

    #Save
      #ggsave2('Beaver Body Temp.pdf',beaver_combined,width=15,height=8)

Section 3: Replication of Random Effects

3.1 Setup

set.seed(123)

 ######### Random Effects Models 
  
    #Mean and SD Among Geese
      honk_sd<-5
      honk_mu<-40
      
    #Number of Individuals
      n_honk=seq(2,20,1)
      
    #Goose Model 
      goose_re_model=AR1(phi=0.5,sigma2=1)
      
    #Sim Parameters and Empty Matrix  
      nsims_goose=1000 #sims
      N_goose=100 # time series length
      goose_re_matrix<-list(phi=matrix(NA,ncol=nsims_goose,nrow=length(n_honk)),sigma_goose=matrix(NA,ncol=nsims_goose,nrow=length(n_honk)))

3.2 Simulations

    ## Loop Start 
      for(k in 1:length(n_honk)){
        for(j in 1:nsims_goose){
      
    ## Step 1Draw Means For Each Goose
      alphavals<-rnorm(n_honk[k],honk_mu,honk_sd)
      
    ## Step 2 Data Generation  
      dat1<-lapply(alphavals,function(x) data.frame(yval= x + gen_gts(N_goose,goose_re_model), time=seq(N_goose),id=paste0("goose",which(alphavals==x)))) %>% do.call("rbind",.)
      nrow(dat1)
   
    ## Step 3 FIT LME model 
      goose_re_ar1<-corARMA(c(0.5), p = 1, q = 0,form= ~time)
      goose_re_lme<-lme(Observed~time,data=dat1,
                                 random=~1|id, 
                                 correlation = goose_re_ar1,
                                 na.action=na.omit) 
      summary(goose_re_lme)
      
      
    ## Step 4 Store Items     
      #Store Phi   
        goose_re_matrix$phi[k,j] <- goose_re_lme$modelStruct$corStruct %>% coef(.,unconstrained=FALSE)
      #Store Among-Goose SD  
        goose_re_matrix$sigma_goose[k,j] <- as.numeric(VarCorr( goose_re_lme)[1,2])
      
      ##Loop ENds
        }
      }

3.3 Summary Stats

  ########### SUMMARY STATS
      lapply(goose_re_matrix,function(x)apply(x,1,mean))
## $phi
##  [1] 0.4914081 0.4926878 0.4990770 0.4958922 0.4952411 0.4982249 0.4974534
##  [8] 0.4972281 0.4986557 0.4978780 0.4978753 0.4968469 0.4985571 0.4976157
## [15] 0.4973656 0.4971918 0.4982403 0.4994167 0.4987167
## 
## $sigma_goose
##  [1] 3.959228 4.446424 4.598545 4.646355 4.761005 4.782489 4.808544 4.833446
##  [9] 4.844698 4.865221 4.878164 4.961628 4.970227 4.903189 4.948482 4.937908
## [17] 4.857903 4.970932 4.938241
      lapply(goose_re_matrix,function(x)apply(x,1,range))
## $phi
##           [,1]      [,2]      [,3]      [,4]      [,5]      [,6]      [,7]
## [1,] 0.2813882 0.3099446 0.3603680 0.3752065 0.3514238 0.3773521 0.3863020
## [2,] 0.7054673 0.6449012 0.6338308 0.6095826 0.6212985 0.6146061 0.5933419
##           [,8]      [,9]     [,10]     [,11]     [,12]     [,13]     [,14]
## [1,] 0.3798789 0.3957743 0.3852803 0.4233888 0.4063442 0.4307252 0.4235077
## [2,] 0.5888990 0.5868247 0.5723121 0.5677152 0.5713301 0.5808085 0.5674554
##          [,15]     [,16]     [,17]     [,18]     [,19]
## [1,] 0.4125499 0.4282766 0.4229057 0.4279133 0.4226375
## [2,] 0.5596717 0.5742599 0.5712340 0.5513240 0.5502323
## 
## $sigma_goose
##              [,1]         [,2]       [,3]       [,4]      [,5]     [,6]
## [1,] 4.465331e-05 6.789316e-05  0.3198909  0.7535055 0.8702012 1.198644
## [2,] 2.050951e+01 1.385842e+01 13.4792230 10.1685700 9.7006870 9.379482
##          [,7]      [,8]     [,9]    [,10]    [,11]    [,12]    [,13]    [,14]
## [1,] 1.603180  1.919966 1.976203 1.891609 1.875270 2.034655 2.088641 2.442968
## [2,] 8.783969 10.276050 8.986885 8.579573 8.966518 8.003114 8.608600 8.164428
##         [,15]    [,16]    [,17]    [,18]    [,19]
## [1,] 2.649166 2.489730 2.316943 2.176838 2.555849
## [2,] 8.419126 8.125053 7.274836 7.536690 7.443445
     ########### Collapse For Plotting 
    #Phi  
      gphi<-data.frame(value=goose_re_matrix$phi  %>% t() %>% as.vector())
      gphi$samplesize<-rep(n_honk,each=nsims_goose)
      gphi$panel="Autocorrelation (phi)"
    #Sigma 
      gsigma<-data.frame(value=goose_re_matrix$sigma_goose  %>% t() %>% as.vector())
      gsigma$samplesize<-rep(n_honk,each=nsims_goose)
      gsigma$panel='Among Individual Variation (SD)'
    #Combine 
      gall<-rbind(gphi,gsigma)

3.4 Plotting

    #Dataframe for Real Value Lines  
      random_dummy <- data.frame(panel = c("Among Individual Variation (SD)", "Autocorrelation (phi)"), Z = c(5, 0.5))
         
    #Plot of Model Estimates  
      goose_random_plot1<-ggplot(gall,aes(x=samplesize,y=value)) + geom_hline(data = random_dummy, aes(yintercept = Z),lty="dashed") 
      goose_random_plot2 <- goose_random_plot1 + stat_pointinterval(point_size=5,shape=21,point_fill="white",interval_alpha=0.5) 
      goose_random_plot3<- goose_random_plot2 + facet_wrap(.~panel,ncol=2,scales = "free") + theme_bw()  + plotopts + theme(axis.text = element_text(size=25))+ labs(x=" Sample Size (Number of Individuals)",y="Value") 
      
    #Example Variation Plot 
      #Data
        alphavals_eg<-rnorm(10,honk_mu,honk_sd)
        dat_eg<-lapply(alphavals_eg,function(x) data.frame(yval= x + gen_gts(N_goose,goose_re_model), time=seq(N_goose),id=paste0("goose",which(alphavals_eg==x)))) %>% do.call("rbind",.)
        head(dat_eg)
##   Observed time     id
## 1 38.51084    1 goose1
## 2 38.75077    2 goose1
## 3 38.81272    3 goose1
## 4 36.39476    4 goose1
## 5 38.06196    5 goose1
## 6 36.16672    6 goose1
      #Honk Vignette
        sandpiper <- name_search(text = "Actitis hypoleucos", options = "namebankID")[[1]]
        sandpiper_img<-image_data(name_images(uuid = sandpiper$uid[1])$same[[1]]$uid,size=1024)[[1]]
        
      #Plot
          sandp1_eg1<-ggplot(dat_eg,aes(x=time,y=Observed)) + geom_line(aes(colour=id),cex=2) + scale_y_continuous(limits=c(32,54))
          sandp1_eg2<- sandp1_eg1 + theme_bw() + plotopts + theme(axis.text = element_text(size=25)) + labs(x="Time Step",y="Logger Value") + guides(color=F) + add_phylopic(sandpiper_img,x=10,y=52,alpha=1,color="black",ysize=18)
          sandp1_eg2
## Warning: Removed 55 row(s) containing missing values (geom_path).

      ggsave2('Random Effects Replication.pdf',goose_random_plot3,width=10,height=5)
      
      
    ### Combine 
      random_combined<-plot_grid(sandp1_eg2,goose_random_plot3,labels=c("A","B"),rel_widths = c(1,1.5),label_size = 25)
## Warning: Removed 55 row(s) containing missing values (geom_path).
      random_combined

        #ggsave2('Random Effect Combined Plot.pdf',random_combined,width=18,height = 8)
        #ggsave2('Random Effect Combined Plot.tiff',random_combined,width=18,height = 8)

Section 4: Nuisance Parameters

4.1 Setup

  # Set seed for reproducibility
    set.seed(9)
    
  # Define sample size
    n_nuisance = 100
    
  # Define beta (slope over time)
    beta = 0.02

  #Sim Parameters and Empty Matrix  
    nsims_nuisance=1000 #sims
    
  # Autocorrelation Models
    nuisance_ar1<-corARMA(c(0.5), p = 1, q = 0,form= ~time)
    nuisance_arma<-corARMA(c(0.5,-0.5,0.5), p = 2, q = 1,form= ~time)
    
  #Store Data  
    #Phi x2
    # SD x 2
    #time x 2
    #nuisance x 2
    # time p x 2
    # nuisance p x 2
    nuisance_results<-data.frame(matrix(NA,nrow=nsims_nuisance,ncol=12))
    colnames(nuisance_results)<-c("phi_ar","phi_arma","sd_ar","sd_arma","time_ar","time_arma","nuisance_ar","nuisance_arma","time_ar_p","time_arma_p","nuisance_ar_p","nuisance_arma_p")

4.2 Simulations

    ## Loop Start 

      for(k in 1:nsims_nuisance){
        
      ## Step 1Draw Means For Each Goose
        alphavals_nuisance<-rnorm(20,40,5)
        
      ## Step 2 Data Generation Using ARMA(2,1) Model + Slope for Time
        dat_nuisance<-lapply(alphavals_nuisance,function(x) data.frame(yval= x + gen_gts(ARMA(ar= c(0.8,-0.7),ma=0.7, sigma2 = 50), n = n_nuisance) , time=seq(100),nuisance=runif(n_nuisance),id=paste0("id",which(alphavals_nuisance==x)))) %>% do.call("rbind",.)
        dat_nuisance$outcome<- with(dat_nuisance,beta*time + Observed)
        
        #nrow(dat_nuisance)
      
      ## Step 3 Fit AR(1) Model
        nuisance_ar_lme<-lme(outcome~time + nuisance,data=dat_nuisance,
                          random=~1|id, 
                          correlation = nuisance_ar1,
                          na.action=na.omit) 
        summary(nuisance_ar_lme)
        
      ## Step 4 Fit ARMA Model   
        nuisance_arma_lme<-lme(outcome~time + nuisance,data=dat_nuisance,
                             random=~1|id, 
                             correlation = nuisance_arma,
                             na.action=na.omit) 
        summary(nuisance_arma_lme)
    
  ## Store Parameters  
      #Phi  
        nuisance_results$phi_ar[k]<-nuisance_ar_lme$modelStruct$corStruct %>% coef(.,unconstrained=FALSE)
        nal_temp<-nuisance_arma_lme$modelStruct$corStruct %>% coef(.,unconstrained=FALSE) 
        nuisance_results$phi_arma[k]<-nal_temp[1]
      #Among Individual Variation (SD)  
        nuisance_results$sd_ar[k]<-as.numeric(VarCorr( nuisance_ar_lme)[1,2])
        nuisance_results$sd_arma[k]<-as.numeric(VarCorr( nuisance_arma_lme)[1,2])
      #Time Beta
        nuisance_results$time_ar[k]<-fixef(nuisance_ar_lme)[2]
        nuisance_results$time_arma[k]<-fixef(nuisance_arma_lme)[2]
      #Nuisance Beta
        nuisance_results$nuisance_ar[k]<-fixef(nuisance_ar_lme)[3]
        nuisance_results$nuisance_arma[k]<-fixef(nuisance_arma_lme)[3]
      #Time P Value
        nuisance_results$time_ar_p[k]<-summary(nuisance_ar_lme)[[20]][2,5]
        nuisance_results$time_arma_p[k]<-summary(nuisance_arma_lme)[[20]][2,5]
      #Nuisance P Value 
        nuisance_results$nuisance_ar_p[k]<-summary(nuisance_ar_lme)[[20]][3,5]
        nuisance_results$nuisance_arma_p[k]<-summary(nuisance_arma_lme)[[20]][3,5]
        
        } #loop end

4.3 Power Calculations

  ## Power / False Positives
    nuisance_results %>% dplyr::select(time_ar_p,time_arma_p,nuisance_ar_p,nuisance_arma_p) %>% apply(.,2,function(x)mean(x<0.05))
##       time_ar_p     time_arma_p   nuisance_ar_p nuisance_arma_p 
##           0.015           0.476           0.009           0.055
  ## Parameter Estimates Means
    nuisance_results %>% dplyr::select(phi_ar,phi_arma,sd_ar,sd_arma,time_ar,time_arma,nuisance_ar,nuisance_arma) %>% apply(.,2,mean)
##        phi_ar      phi_arma         sd_ar       sd_arma       time_ar 
##   0.560065248   0.800032144   4.034958832   4.941458446   0.019772708 
##     time_arma   nuisance_ar nuisance_arma 
##   0.019737236   0.009211209   0.002952260

4.4 Plots

###P value Plot
    nuisance_p<-nuisance_results %>% dplyr::select(time_ar_p,time_arma_p,nuisance_ar_p,nuisance_arma_p) %>% pivot_longer(colnames(.),names_to="parameter",values_to="estimate") 
    nuisance_p$var<-"Time"
    nuisance_p$var[grep("nuisance",nuisance_p$parameter)]<-"Nuisance"
    nuisance_p$model<-"AR"
    nuisance_p$model[grep("arma",nuisance_p$parameter)]<-"ARMA"
    
    
    nuisance_p_plot1<-ggplot(nuisance_p) + geom_density(aes(x=estimate,fill=parameter),alpha=0.5) + facet_grid(var~model,scales="free")+theme_bw()+plotopts
    nuisance_p_plot2<- nuisance_p_plot1 + scale_fill_brewer(palette = "Set2") + guides(fill=F) + labs(y="Density",x="Estimate")
    nuisance_p_plot2

###Parameter Plot    
  ## Parameter Estimates Re-Format
    nuisance_estimates<-nuisance_results %>% dplyr::select(phi_ar,phi_arma,sd_ar,sd_arma,time_ar,time_arma,nuisance_ar,nuisance_arma) %>% pivot_longer(colnames(.),names_to="parameter",values_to="estimate")   
    
  #Add A Panel Indicator
    nuisance_estimates$panel<-"Phi"
    nuisance_estimates$panel[grep("sd",nuisance_estimates$parameter)]<-"Ind. Variation (SD)"
    nuisance_estimates$panel[grep("time",nuisance_estimates$parameter)]<-"Time Beta"
    nuisance_estimates$panel[grep("nuisance",nuisance_estimates$parameter)]<-"Nuisance Beta"
    nuisance_estimates$model<-"AR(1)"
    nuisance_estimates$model[grep("arma",nuisance_estimates$parameter)]<-"ARMA(2,1)"
    
    #Plot
      nuisance_plot1<-ggplot(nuisance_estimates,aes(x=model,y=estimate)) + stat_pointinterval(point_size=5,shape=21,aes(point_fill=model),interval_alpha=0.8) + facet_wrap(.~panel,scales="free")+theme_bw()+plotopts
      nuisance_plot2 <- nuisance_plot1 + labs(y="Estimate",x="Parameter") + guides(point_fill=F)
      nuisance_plot2

# ### Example Data
#         ## Step 1Draw Means For Each Goose
#         alphavals_nuisance<-rnorm(20,40,5)
#         
#       ## Step 2 Data Generation Using ARMA(2,1) Model + Slope for Time
#         dat_nuisance<-lapply(alphavals_nuisance,function(x) data.frame(yval= x + gen_gts(ARMA(ar= c(0.8,-0.7),ma=0.7, sigma2 = 50), n = n_nuisance) , time=seq(100),nuisance=runif(n_nuisance),id=paste0("id",which(alphavals_nuisance==x)))) %>% do.call("rbind",.)
#         dat_nuisance$outcome<- with(dat_nuisance,beta*time + Observed)
#         
#         
#       nuisance_eg1<-ggplot(dat_nuisance,aex(x=time,y=outcome)) + geom_line(aes(color-id)) + theme_bw() + plotopts + guides(color=F)
#       nuisance_eg2<- nuisance_eg1 + labs(x="Time",y=ylab)
      
    ####### COMBINED PLOT
      nuisance_combined<-plot_grid(nuisance_plot2,nuisance_p_plot2,labels=c("A","B"),label_size = 30)
      nuisance_combined

      #ggsave2('Nuisance Parameter Plot.pdf',nuisance_combined,width=16,height=8)
      #ggsave2('Nuisance Parameter Plot.tiff',nuisance_combined,width=16,height=8)

4.5 Low Sample Size Simulations

###############################################################      
######### NUISANCE 2 - Low SAMPLE SIZE ########
###############################################################      
  
      # Set seed for reproducibility
      set.seed(9)
      
      # Define sample size
      n_low = 100
      
      #Sim Parameters and Empty Matrix  
      nsims_low=1000 #sims
      
      # Autocorrelation Models
      non_ar1<-corARMA(c(0.5), p = 1, q = 0,form= ~time)
      non_arma<-corARMA(c(0.5,-0.5,0.5), p = 2, q = 1,form= ~time)
      
      #Store Data  
      low_results<-data.frame(matrix(NA,nrow=nsims_low,ncol=12))
      colnames(low_results)<-c("phi_ar","phi_arma","sd_ar","sd_arma","time_ar","time_arma","nuisance_ar","nuisance_arma","time_ar_p","time_arma_p","nuisance_ar_p","nuisance_arma_p")
      ## Loop Start 
      
      for(k in 1:nsims_low){
        
        ## Step 1Draw Means For Each Goose
        alphavals_low<-rnorm(5,40,5)
        
        ## Step 2 Data Generation Using ARMA(2,1) Model + Slope for Time
        dat_low<-lapply(alphavals_low,function(x) data.frame(yval= x + gen_gts(ARMA(ar= c(0.8,-0.7),ma=0.7, sigma2 = 50), n = n_low) , time=seq(100),nuisance=runif(n_low),id=paste0("id",which(alphavals_low==x)))) %>% do.call("rbind",.)
        dat_low$outcome<- with(dat_low,beta*time + Observed)
        
        #nrow(dat_nuisance)
        
      ## Step 3 Fit AR(1) Model
        low_ar_lme<-lme(outcome~time + nuisance,data=dat_low,
                             random=~1|id, 
                             correlation = nuisance_ar1,
                             na.action=na.omit) 
        #summary(nuisance_ar_lme)
        
        ## Step 4 Fit ARMA Model   
        low_arma_lme<-lme(outcome~time + nuisance,data=dat_low,
                               random=~1|id, 
                               correlation = nuisance_arma,
                               na.action=na.omit) 
        #summary(nuisance_arma_lme)
        
        ## Store Parameters  
        #Phi  
        low_results$phi_ar[k]<-low_ar_lme$modelStruct$corStruct %>% coef(.,unconstrained=FALSE)
        nal_temp<-low_arma_lme$modelStruct$corStruct %>% coef(.,unconstrained=FALSE) 
        low_results$phi_arma[k]<-nal_temp[1]
        #Among Individual Variation (SD)  
        low_results$sd_ar[k]<-as.numeric(VarCorr( low_ar_lme)[1,2])
        low_results$sd_arma[k]<-as.numeric(VarCorr( low_arma_lme)[1,2])
        #Time Beta
        low_results$time_ar[k]<-fixef(low_ar_lme)[2]
        low_results$time_arma[k]<-fixef(low_arma_lme)[2]
        #Nuisance Beta
        low_results$nuisance_ar[k]<-fixef(low_ar_lme)[3]
        low_results$nuisance_arma[k]<-fixef(low_arma_lme)[3]
        #Time P Value
        low_results$time_ar_p[k]<-summary(low_ar_lme)[[20]][2,5]
        low_results$time_arma_p[k]<-summary(low_arma_lme)[[20]][2,5]
        #Nuisance P Value 
        low_results$nuisance_ar_p[k]<-summary(low_ar_lme)[[20]][3,5]
        low_results$nuisance_arma_p[k]<-summary(low_arma_lme)[[20]][3,5]
        
      } #loop end
      
      
    ## Power / False Positives
      low_results %>% dplyr::select(time_ar_p,time_arma_p,nuisance_ar_p,nuisance_arma_p) %>% apply(.,2,function(x)mean(x<0.05))
##       time_ar_p     time_arma_p   nuisance_ar_p nuisance_arma_p 
##           0.000           0.136           0.016           0.061
    ## Parameter Estimates Means
      low_results %>% dplyr::select(phi_ar,phi_arma,sd_ar,sd_arma,time_ar,time_arma,nuisance_ar,nuisance_arma) %>% apply(.,2,mean)
##        phi_ar      phi_arma         sd_ar       sd_arma       time_ar 
##    0.56081643    0.79879834    3.42412645    4.55873003    0.01902454 
##     time_arma   nuisance_ar nuisance_arma 
##    0.01888894   -0.02135437    0.01690382
    ###P value Plot
      low_p<-low_results %>% dplyr::select(time_ar_p,time_arma_p,nuisance_ar_p,nuisance_arma_p) %>% pivot_longer(colnames(.),names_to="parameter",values_to="estimate") 
      low_p$var<-"Time"
      low_p$var[grep("nuisance",low_p$parameter)]<-"Nuisance"
      low_p$model<-"AR"
      low_p$model[grep("arma",low_p$parameter)]<-"ARMA"
      
      
      low_p_plot1<-ggplot(low_p) + geom_density(aes(x=estimate,fill=parameter),alpha=0.5) + facet_grid(var~model,scales="free")+theme_bw()+plotopts
      low_p_plot2<- low_p_plot1 + scale_fill_brewer(palette = "Set2") + guides(fill=F) + labs(y="Density",x="Estimate")
      low_p_plot2

    ###Parameter Plot    
      ## Parameter Estimates Re-Format
      low_estimates<-low_results %>% dplyr::select(phi_ar,phi_arma,sd_ar,sd_arma,time_ar,time_arma,nuisance_ar,nuisance_arma) %>% pivot_longer(colnames(.),names_to="parameter",values_to="estimate")   
      
      #Add A Panel Indicator
      low_estimates$panel<-"Phi"
      low_estimates$panel[grep("sd",low_estimates$parameter)]<-"Ind. Variation (SD)"
      low_estimates$panel[grep("time",low_estimates$parameter)]<-"Time Beta"
      low_estimates$panel[grep("nuisance",low_estimates$parameter)]<-"Nuisance Beta"
      low_estimates$model<-"AR(1)"
      low_estimates$model[grep("arma",low_estimates$parameter)]<-"ARMA(2,1)"
      
     #Plot
      low_plot1<-ggplot(low_estimates,aes(x=model,y=estimate)) + stat_pointinterval(point_size=5,shape=21,aes(point_fill=model),interval_alpha=0.8) + facet_wrap(.~panel,scales="free")+theme_bw()+plotopts
      low_plot2 <- low_plot1 + labs(y="Estimate",x="Parameter") + guides(point_fill=F)
      low_plot2

4.6 Combined Plot of Both Sets of Simulations

  ##### COMBINED PARAMETER PLOT
    #Combine  
      low_estimates$samplesize=5
      nuisance_estimates$samplesize=20
      low_combined<-rbind(low_estimates,nuisance_estimates)
      
      #Dataframe for Real Value Lines  
      low_dummy <- data.frame(panel = c("Ind. Variation (SD)", "Phi","Time Beta","Nuisance Beta"), Z = c(5, 0.8,0.02,0))
      
      
    #Plot  
      sample_combined_plot1<-ggplot(low_combined,aes(x=model,y=estimate)) + geom_hline(data = low_dummy, aes(yintercept = Z),lty="dashed") + stat_pointinterval(point_size=5,shape=21,aes(point_fill=factor(samplesize)),interval_alpha=0.8,position = position_dodge(width=0.5)) 
      sample_combined_plot2 <- sample_combined_plot1 +  facet_wrap(.~panel,scales="free")+theme_bw()+plotopts 
      sample_combined_plot3 <-  sample_combined_plot2 + labs(y="Estimate",x="Parameter",point_fill="Sample Size") + theme(legend.position = "top",legend.text = element_text(size=20),legend.title = element_text(size=20)) 
      sample_combined_plot3

  ######### MEGA COMBINED PLOT 
      
       nuisance_low_combined<-plot_grid(sample_combined_plot3,nuisance_p_plot2,low_p_plot2,labels=c("A","B","C"),label_size = 30,ncol=3)
      nuisance_low_combined

      #ggsave2('Nuisance Low Parameter Plot.pdf',nuisance_low_combined,width=22,height=8)
     # ggsave2('Nuisance Low Parameter Plot.tiff',nuisance_low_combined,width=22,height=8)

Section 5 Power Analysis

5.1 Example Data

      set.seed(123)

  ####### Single Iteration Testing - Assume 5 Per Sex
      x_per_single<-5

     # Model for Timne Series -  AR1 with Phi 0.5 + Lots of Variability in AR1 and White Noise Processes
      power_time_model_single<- AR1(phi = .5, sigma2 = 100) + WN(sigma2=100)
      
    #Draw Intercepts for Each Bird, with Sex-Specific Distribution  
      alphavals_single<-c(rnorm(x_per_single,60,30),rnorm(x_per_single,80,30))
      
    #Generate Vector of Data for each bird and collapse to data frame  
      dat_single<-lapply(alphavals_single,function(x) data.frame(yval= x + gen_gts(n, model=power_time_model_single),time=seq(n),id=paste0("id",which(alphavals_single==x)))) %>% do.call("rbind",.)
      dat_single$sex<-rep(rep(c("M","F"),each=x_per_single),each=n) #add sex
      
    ## Step 3 Fit Model 
      # power_ar1<-corARMA(c(0.5), p = 1, q = 0,form= ~time)
      # power_lme<-lme(Observed~time+sex,data=dat_single,
      #                   random=~1|id, 
      #                   correlation = power_ar1,
      #                   na.action=na.omit) 
      # summary(power_lme)

5.2 Plot of Pengwing Data

    #Phylopic of Seabird
      pengwing <- name_search(text = "Pygoscelis papua", options = "namebankID")[[1]]
      pengwing_img<-image_data(name_images(uuid = pengwing$uid[1])$same[[1]]$uid,size=1024)[[1]]
      
    #Example Plot   
      dive_eg1<-ggplot(dat_single,aes(y=Observed,x=time)) + geom_line(aes(color=id),cex=1.4) + facet_wrap(.~sex)
      dive_eg2<- dive_eg1 + theme_bw() + plotopts + labs(y="Dive Depth (m)",x="Time") + guides(color=F) + add_phylopic(pengwing_img,x=10,y=160,alpha=1,color="black",ysize = 30)
      dive_eg2

5.3 Power Analysis Loop

  ####### Looped Power Analysis
      
      x_per_sex<-seq(5,50,5)    #individuals of each sex for each  step 
      n=100                     # values per time series
      nsims=1000                  #sims per sample size
      
      #Empty Matrix
          powercalc_mat1<-matrix(NA,ncol=nsims,nrow=length(x_per_sex))
          
      #Time Series Model - Real Data
        power_time_model<- AR1(phi = .5, sigma2 = 100) + WN(sigma2=100)
        
      # Time Series Model - Analytical Model  
        power_ar1<-corARMA(c(0.5), p = 1, q = 0,form= ~time)
        
      for(k in 1:length(x_per_sex)){
        for(j in 1:nsims){
        
        
    ## Step 1 Generate Intercept Data for each Individual from Common Distribution    
      #Mean Values For Each Individual 
      alphavals2<-c(rnorm(x_per_sex[k],60,30),rnorm(x_per_sex[k],80,30))
      alphavals2
        
    ## Step 2 Generate Time Series  
        dat2<-lapply(alphavals2,function(x) data.frame(yval= x + gen_gts(n, model=power_time_model),time=seq(n),id=paste0("id",which(alphavals2==x)))) %>% do.call("rbind",.)
        dat2$sex<-rep(rep(c("M","F"),each=x_per_sex[k]),each=n)
        
    ## Step 3 Fit Model 
        ## Step 3 Fit Model 
        power_lme_iter<-lme(Observed~time+sex,data=dat2,
                       random=~1|id, 
                       correlation = power_ar1,
                       na.action=na.omit,method="ML") 
        power_lme_null<-lme(Observed~time,data=dat2,
                            random=~1|id, 
                            correlation = power_ar1,
                            na.action=na.omit,method="ML") 
        
        
    ##Extract T Value    
        powercalc_mat1[k,j]<-anova(power_lme_iter,power_lme_null)[[9]][2]
        }
          }

5.4 Calculate Power

    ###### Assemble Data     
       power1data<-data.frame(samplesize=x_per_sex,power= apply(powercalc_mat1,1,function(x)mean(x<=0.05)))

5.5 Plot of Power by Sample Size

    #### Plot Power   
       power1<- ggplot(power1data,aes(x=samplesize,y=power)) +  geom_hline(yintercept = 0.8,lty="dashed") + geom_smooth(method = "lm",formula = 'y~poly(x,2)') +  geom_point(shape=21,size=5,fill="lightblue") + theme_bw() + plotopts + labs(x="Sample Size Per Group (Sex)",y="Power")
        
        
    ### Combined With Eg DATA    
       power_combined<-plot_grid(dive_eg2,power1,labels=c("A","B"),label_size = 30)
       power_combined

    ## Save
       #ggsave2('Power Curve Combined Plot.pdf',power_combined,width=16,height=8)